

************************************************
*
*Providing Status of plants from 2021 and 2022
*
*********************************************************

// 2021 data vintage
import excel "../Raw/GEM/January 2021 Global Coal Plant Tracker.xlsx", sheet("Units") firstrow clear 
keep TrackerID Status
replace Status=strtrim(Status)
replace Status=strupper(Status)
rename Status status_gcpt21
tempfile gcpt2021
save `gcpt2021'

//2022 data vintage
import excel "../Raw/GEM/Global-Coal-Plant-Tracker-Jan-2022", sheet("Units") firstrow clear 
merge 1:1 TrackerID using `gcpt2021', keep(1 3) nogen 

**clean data
keep TrackerID TrackerLOC ParentID Country Subnationalunitprovincestat Unit Plant Parent Status Year RETIRED PlannedRetire HeatrateBtuperkWh ///
Remainingplantlifetimeyears Combustiontechnology Coaltype CapacityMW Capacityfactor AnnualCO2milliontonnesann status_gcpt21
rename (Country Subnationalunitprovincestat Year RETIRED PlannedRetire) (PlantCountry PlantProvince EstYr RetiredYr PlanRetireYr)
replace EstYr="" if  missing(real(EstYr))==1
destring EstYr, replace
replace PlanRetireYr="" if  missing(real(PlanRetireYr))==1
destring PlanRetireYr, replace
replace Status=strtrim(Status)
replace Status=strupper(Status)


//look at status changes 
rename Status status_gcpt22
label var status_gcpt22 "Status 2022"
label var status_gcpt21 "Status 2021"

tempfile gcpt_2021_2022
save `gcpt_2021_2022'

*********************************************************
*
*GCPT Retirement 2017-2020
*
*********************************************************
clear
tempfile cumulator
quietly save `cumulator', emptyok

foreach sheet in "July 2017" "Jan 2018" "Jul 2018" "Jan 2019" "July 2019" "Jan 2020" {
	import excel "../Raw/GCPTs/2015 - 2020 GEM Global Coal Plant Tracker.xlsx", clear firstrow sheet(`sheet')
	gen sheet = "`sheet'"
	destring PlannedRetire, replace force
	rename PlannedRetire GCPT_planned_retire_year
	keep TrackerID  sheet GCPT_planned_retire_year 
	append using `cumulator'
	quietly save `cumulator', replace

}

import excel "../Raw/GCPTs/January 2021 Global Coal Plant Tracker.xlsx", clear sheet(Units) firstrow
	gen sheet = "Jan 2021"
	destring PlannedRetire, replace force
	rename PlannedRetire GCPT_planned_retire_year
	keep TrackerID  sheet GCPT_planned_retire_year 
	append using `cumulator'
	quietly save `cumulator', replace
	
	
import excel "../Raw/GCPTs/July 2021b Global Coal Plant Tracker.xlsx", clear sheet(Units) firstrow
	gen sheet = "July 2021"
	destring PlannedRetire, replace force
	rename PlannedRetire GCPT_planned_retire_year
	keep TrackerID  sheet GCPT_planned_retire_year 
	append using `cumulator'
	quietly save `cumulator', replace


import excel "../Raw/GCPTs/Global-Coal-Plant-Tracker-January-2022b (2).xlsx", clear sheet(Units) firstrow
	gen sheet = "Jan 2022"
	destring PlannedRetire, replace force
	rename PlannedRetire GCPT_planned_retire_year
	keep TrackerID  sheet GCPT_planned_retire_year 
	append using `cumulator'
	quietly save `cumulator', replace
	
import excel "../Raw/GCPTs/Global-Coal-Plant-Tracker-July-2022.xlsx", clear sheet(Units) firstrow
	gen sheet = "July 2022"
	destring PlannedRetire, replace force
	rename PlannedRetire GCPT_planned_retire_year
	keep TrackerID  sheet GCPT_planned_retire_year 
	append using `cumulator'
	quietly save `cumulator', replace


drop if missing(TrackerID)

gen year = substr(sheet, -4, .)
destring year, replace
egen gcpt_earl_annoucement__rtr_yr = min(year) if !missing(GCPT_planned_retire_year), by(TrackerID)



sort TrackerID year
egen tag = tag(GCPT_planned_retire_year TrackerID)
egen gcpt_retire_yr_switch = total(tag), by(TrackerID)
by TrackerID (sheet), sort: gen gcpt_n_changes = sum(GCPT_planned_retire_year != GCPT_planned_retire_year[_n-1] & GCPT_planned_retire_year != 1)
egen gcpt_first_plan =  min(GCPT_planned_retire_year) if gcpt_n_changes == 1, by(TrackerID)
egen gcpt_second_plan =  min(GCPT_planned_retire_year) if gcpt_n_changes == 2, by(TrackerID)
egen gcpt_third_plan =  min(GCPT_planned_retire_year) if gcpt_n_changes == 3, by(TrackerID)
drop tag



collapse (min) GCPT_first_retire_yr_plan = GCPT_planned_retire_year gcpt*, by(TrackerID)
drop if missing(GCPT_first_retire_yr_plan)

tempfile retire
save `retire'


*********************************************************
*
*Status Changes from 2014-2022
*
*********************************************************
import excel using "../Raw/GEM/Jan 2023 GCPT Status Changes - 2014 - 2022 (13).xlsx", clear firstrow

*Left Censor Data if had a status before data began 
gen left_cen = H22014 ~= "XXX"

*Renaming year
rename (Year RETIRED Country Subnationalunitprovincestat) (EstYr RetiredYr PlantCountry PlantProvince)

*Getting Most Recent Status
gen most_recent_status_2022 = H22022

*Turning EstYr to numbers
replace EstYr="" if  missing(real(EstYr))==1
destring EstYr, replace


*Shaping data long
reshape long H, i(TrackerID) j(temp_date)
rename H status

*Extracting date
gen first_digit = real(substr(string(temp_date, "%5.0g"), 1, 1))
gen day_of_half_year = 1 + (first_digit - 1) * 6
gen year = real(substr(string(temp_date), 2, 5))
gen date = mdy(day_of_half_year, 1, year)
format date %tdCCYY-NN-DD
drop first_digit day_of_half_year year temp_date

*Creating variable if zombie_plant (If mothball and then unmothball)
sort TrackerID date
by TrackerID: gen next_status = status[_n+1]
bysort TrackerID: egen zombie_plant = max(status == "Mothballed" & next_status == "Operating")
bysort TrackerID: egen dead_mothball_plant = max(status == "Mothballed" & next_status == "Retired")
drop next_status
bysort TrackerID: egen operating_2014 = max(date == mdy(7, 1, 2014) & status == "Operating")

*First and last date that plant retires/mothballs since data start
gen retired_mothballed_2014 = status == "Mothballed" | status == "Retired"
egen retired_moth_2014_first_date = min(cond(retired_mothballed_2014, date, .)), by(TrackerID)
format retired_moth_2014_first_date %tdCCYY-NN-DD
egen retired_moth_2014_last_date = max(cond(retired_mothballed_2014, date, .)), by(TrackerID)
format retired_moth_2014_last_date %tdCCYY-NN-DD
drop retired_mothballed_2014


*First date of each status
egen first_date_ = min(date), by(status TrackerID)
format first_date_ %tdCCYY-NN-DD


*Condense data
drop date
duplicates drop
replace status = subinstr(status, "Pre-permit", "pre_permit", .)

*reshaping data wide
reshape wide first_date_, i(TrackerID) j(status) string
gen est_year_date = mdy(1, 1, EstYr)
format est_year_date %tdCCYY-NN-DD

*Updating retired
gen retired_date = mdy(1, 1, RetiredYr)
format retired_date %tdCCYY-NN-DD
replace first_date_Retired = retired_date if retired_date < mdy(7, 1, 2014) & !missing(retired_date)
drop retired_date

*Removing established year if plant was canceled, not built yet, Mothballed
replace est_year_date = . if !missing(first_date_Cancelled) | est_year_date > mdy(7, 1, 2014) ///
	| first_date_Mothballed == mdy(7, 1, 2014)
	
*Replacing Operation with Established year
replace first_date_Operating = est_year_date if ~missing(est_year_date) & est_year_date < first_date_Operating

*Left Censor Variable
replace left_cen = (first_date_Operating < mdy(7, 1, 2014))*left_cen
drop if TrackerID == "G103638"



*********************************************************
*
*Merging data
*
*********************************************************
merge 1:1 TrackerID using `gcpt_2021_2022', nogen
merge 1:1 TrackerID using `retire', nogen

drop first_date_XXX 
replace first_date_Operating = mdy(1, 1, EstYr) if missing(first_date_Operating)
replace first_date_Retired = mdy(1, 1, RetiredYr) if missing(first_date_Retired)


save "../Intermediate/gcpt_clean.dta",replace








